(* Content-type: application/mathematica *)

(*** Wolfram Notebook File ***)
(* http://www.wolfram.com/nb *)

(* CreatedBy='Mathematica 7.0' *)

(*CacheID: 234*)
(* Internal cache information:
NotebookFileLineBreakTest
NotebookFileLineBreakTest
NotebookDataPosition[       145,          7]
NotebookDataLength[     13960,        356]
NotebookOptionsPosition[     11997,        288]
NotebookOutlinePosition[     12559,        309]
CellTagsIndexPosition[     12516,        306]
WindowFrame->Normal*)

(* Beginning of Notebook Content *)
Notebook[{

Cell[CellGroupData[{
Cell["Microarray Gene Expression Analysis", "Title",
 CellChangeTimes->{{3.4448837771488934`*^9, 3.444883784562468*^9}}],

Cell["Statistical Analysis", "Subtitle",
 CellChangeTimes->{{3.444883789132296*^9, 3.4448838210671864`*^9}, {
  3.4453908749557123`*^9, 3.445390883016062*^9}, {3.445390932829582*^9, 
  3.445390935477747*^9}, {3.445391046435865*^9, 3.445391047601157*^9}}],

Cell["Hans - Martin Will, Ph.D.", "Subsubtitle"],

Cell[TextData[{
 "Copyright \[Copyright] 2009 Hans-Martin Will. This notebook is licensed \
under the ",
 ButtonBox["Creative Commons Attribution 3.0 License",
  BaseStyle->"Hyperlink",
  ButtonData->{
    URL["http://creativecommons.org/licenses/by/3.0"], None},
  ButtonNote->"http://creativecommons.org/licenses/by/3.0"],
 ". Distributions of this notebook in modified or unmodified form need to \
include this notice."
}], "Text",
 CellChangeTimes->{{3.444885583784873*^9, 3.44488571568578*^9}, 
   3.444885750604946*^9, {3.4448857808997927`*^9, 3.444885794228957*^9}, {
   3.444885849778867*^9, 3.444885895263208*^9}, {3.444885988618767*^9, 
   3.444886023721356*^9}, {3.444886679102549*^9, 3.4448867925795593`*^9}, {
   3.444886828004525*^9, 3.444886879056986*^9}, {3.4448869480613832`*^9, 
   3.44488696792741*^9}, {3.445135694625877*^9, 3.445135722559115*^9}, {
   3.445228305643272*^9, 3.445228330267249*^9}, {3.4453935930912523`*^9, 
   3.445393593976077*^9}},
 FontWeight->"Bold"],

Cell[BoxData[
 GraphicsBox[
  TagBox[RasterBox[CompressedData["
1:eJztmU1PG1cUhi11HcSeRcku7GirdJlaSn8A7aqbVCh/IFF3kaiCIhIpUiVk
OYraRUOl7krAxubDX9gG2/hjbI8HQrb8BH6C+8yc+DDYxgzWGLtKj8bSzJ07
d+a+877vOXd89/GTHx9/EQgEvuT3Ez97v91uBz77UBB2Utt7md1kNpE5SGcL
2YNSvlApHNVKlXq52qgazVrdNOqtur2ZhmHWao0qp+hQrBTonCtkuTCZTTII
Q8UTsa29aHQnsrm9sRF/z/Y+tr6+9c9kboqGgJDKJfcPM/lirlA+ZILVRoX5
Nq1G68S0PrSOT49PPtobO9YHi0ZOGaZBNzpzSb6U43IGcUMR2dkUKMBh8qG4
AKGU5/2WjTIEYJpMf+XlysPvH96ZuuNmEYc0rrxaoUPzuAlccKNYLUKM/cN9
hSKW2IruRmwo4hvDUeLZ0rO5uTn3rTmkcUQ4IAcFgRnVWwavOxQOzczMDJYV
HUJvQnRGLxCj9AmKjAhkOxmP7dlQDEGJ8O/he3P35C7BYPCpE+xIC6fo4DsO
SBs5dECo85YXfljQyU5PTy8uLi4vL+ecYIdDGrUDnYvlQsOyoYAVCMTxisRu
eudCHTehxNrf76ampuyRFxbOzs7aruCQRk7RwUcoZCIYIwJHDjDBDQKTZdbn
5+ftfrG2tqZo8IKAAgxBkqGwzXQ+ldjfs9VxmRJenkqYANp6L3V1vbXc1F8c
IDNehyfAcAVhfn6+6130BhDRTVnB5XgFQ3XU0aHEbjSyvelRGshfmPDx9PQq
HAhhhV9eIeMXbEVUMEY8QUG4igYDoMArsE1bHZVCrmhTwnaJlO0SIg0vOIgx
XvsK6CC26SMONhnMGooQY4Ttpml6AUGhEIFwOYOQTIUSmQObEpo4PEpDjNE9
PgKR53QrhRDb9BEHRN2wGqRIOcQTvIMgIYIlSKbwSlwi6+RQlYZ3HEgN7sHd
GcrdTjd/caBiNE9MSgIhgyqC7IAMg04oOO7G1dVVfSqhBIPgElSbl6SRjNtZ
o2MRE4sDDgmfpVhS7jHfwOVg4pFIpKtR+wt7GYRqE5WVnASazqfJGliEZM8J
1wVLhuNTS/b1FYv7zc7O4khgwg7kl0ZJJWAijV3SoPCuX1hEWrOnRxz6+mQv
GUbhk5QNLBxknym7b93lFQMMRPnDUBQSR7Wj4XDQvDkYh1HkzYnCYd1DHSXO
4HsdNVgX2CZplB2mr7pwN/qri/Xx1dV9fbKvJQ7wSXlHjk+2GHBon1QodJ0F
7NyF8bVgG9E6S/KmVNSkPzfVe/MmUAQ74RYI3Ah08ma1kzdTuRvnTfd2y+tu
qaO0qHZXBR6jt446dOooFuBSR910qXXLmzx8qdpdV19b3rvD37p6jDjoOuvd
X3+qJIdZZ4VDDMI6iwFl6a1fY7yvs8aIA7H0fEnW3Y9+fqRQXLvagjbd6+5m
7cXLF4H/cjz95Qn5ruXtOwyNnNLvMPe/vY8iqEPAc3wz8C1+fb5ECQQUyoqA
t+9ygNBo1d/+8dbHh1HM9dB9yscb9Q0bCtP+TotXePpOG5bvtIa/IARck+3a
uQUQJBCIfrdnmld9tw+9Cdnf7a0GnUchhy4+aKPvNxoQX3/zVSQWIZlSV5if
/sexOv/jWBzq/zjReOTBdw9G8QzjUkRvMMHXv73O5DOd//WczTQ4pJFTI0JA
YhL48BnG/wgHHBD+BZWSnD0=
    "], {{0, 31}, {88, 0}}, {0, 255},
    ColorFunction->RGBColor],
   BoxForm`ImageTag["Byte", ColorSpace -> "RGB", Interleaving -> True],
   Selectable->False],
  BaseStyle->"ImageGraphics",
  ImageSize->Automatic,
  ImageSizeRaw->{88, 31},
  PlotRange->{{0, 88}, {0, 31}}]], "Text"],

Cell[CellGroupData[{

Cell["Introduction", "Section",
 CellChangeTimes->{{3.444452911443905*^9, 3.4444529126249113`*^9}, {
  3.444453002881036*^9, 3.444453009109371*^9}, {3.4444531720085497`*^9, 
  3.44445317619203*^9}}],

Cell[CellGroupData[{

Cell["Gene Expression Microarrays", "Subsection",
 CellChangeTimes->{{3.4452284697091217`*^9, 3.445228482800082*^9}}],

Cell["\<\
DNA Microarrays organize DNA oligonucleotides in a spatial arrangement on a \
small glass plate, plastic or silicon substrate. Microarrays can contain up \
to millions of probes, and can be used to perform many genetic or genomic \
tests in parallel. The use of microarrays for gene expression propfiles was \
first reported by Schena at al. (1995), and the representation of a complete \
eukaryotic genome (Saccharomyces cerevisiae) on a microarray was reported by \
Lashkari et al. (1997).\
\>", "Text",
 CellChangeTimes->{{3.444887353346092*^9, 3.444887363050329*^9}, {
  3.44488741517548*^9, 3.44488744377776*^9}, {3.4448874788086863`*^9, 
  3.4448877124559107`*^9}}],

Cell["\<\
For gene expression profiling experiments, the expression levels of thousands \
of gene transcripts are simultaneously monitored to study and determine the \
effects of treatments, disease conditions or developmental conditions. For \
example, gene expression profiling can be used to identify genes whose \
expression is changed when exposed to toxicants or genes expression \
differently in tumor cells and cancer cell lines in comparison to non-tumor \
tissue. \
\>", "Text",
 CellChangeTimes->{{3.4448877585607033`*^9, 3.444887985660445*^9}}],

Cell["\<\
A major design criterion for manufactoring microarrays is the choice between \
single-channel and two-channel arrays. Single-channel arrays are designed to \
measure estimates of absolute abundance of transcript expression. For \
single-channel arrays, a single sample preparation is hybridized to the \
array. For comparative analysis across conditions, expression levels need to \
be compared between data points collected from multiple arrays. Common \
commercial single-channel arrays are those manufactored and distributed by \
Affymetrix \"Gene Chip\", the Applied Microarrays \"CodeLink\" arrays, and \
the Eppendorf \"DualChip & Silverquant\". \
\>", "Text",
 CellChangeTimes->{{3.4448880180829897`*^9, 3.444888260869218*^9}, {
  3.444888301776121*^9, 3.444888349899485*^9}, {3.444888603426197*^9, 
  3.4448886074266253`*^9}}],

Cell["\<\
When using dual-channel arrays, cDNA prepared from two different samples is \
first labeled with different fluorescent dyes before being hybridized to the \
array. Common dyes are Cy3 with a fluorescence emission wavelength of 570 nm \
(green part of the light spectrum), and Cy5 with an emission wavelength of \
670 nm (red part of the light spectrum). The relative intensities of each \
fluorophore can then be used to perform ratio-based analysis to identify \
genes with different expression levels in the two samples.\
\>", "Text",
 CellChangeTimes->{{3.444888336265398*^9, 3.444888582753827*^9}}]
}, Open  ]],

Cell[CellGroupData[{

Cell["Lung Cancer Data Set", "Subsection",
 CellChangeTimes->{{3.445228498389434*^9, 3.4452285023921947`*^9}}],

Cell[TextData[{
 "For our computational example we will use helper functions from the \
Bioinformatica package. As data set, we will use a data set discussed by \
Raponi et al. (2006), which has been deposited in the NCBI GEO data \
repository under accession code GDS2373. In this study, primary squamous cell \
lung carcinomas from 129 patients have been profiled using Affymetrix U133A \
gene chips. Non-small-cell lung cancers (NSCLC) compose 80% of all lung \
carcinomas with squamous cell carcinomas (SCC), and adenocarcinoma are \
representing the majority of these tumors. These data is of interest because \
a prognostic signature could be used to identify patients with early-stage \
high-risk NSCLC who might benefit from adjuvant therapy following surgery. In \
this first tutorial, we will be more concerned with the overall workflow of \
an analysis of microarrays using ",
 StyleBox["Mathematica",
  FontSlant->"Italic"],
 " rather than focusing on the most advisable statistical approaches. Those \
more advanced discussions might be the focus of specialized tutorials at a \
later stage. "
}], "Text",
 CellChangeTimes->{{3.4451679609001017`*^9, 3.4451679937817497`*^9}, {
  3.445168024196751*^9, 3.445168024914763*^9}, {3.445168086468685*^9, 
  3.4451681136165743`*^9}, {3.4451683264976883`*^9, 3.445168371253716*^9}, {
  3.445168675471032*^9, 3.4451686848692408`*^9}, {3.445168829705173*^9, 
  3.445168863678011*^9}, {3.4451688958378773`*^9, 3.445168935909285*^9}, {
  3.445168974416506*^9, 3.44516897847499*^9}, {3.445228520174803*^9, 
  3.445228520521166*^9}}]
}, Open  ]],

Cell[CellGroupData[{

Cell["Accessing the Bioinformatica Package", "Subsection",
 CellChangeTimes->{{3.445228523247863*^9, 3.445228533927959*^9}}],

Cell[TextData[{
 "Throughout this tutorial we will make use of a small package called ",
 StyleBox["Bioinformatica`",
  FontWeight->"Bold"],
 " containing useful bioinformatics helper functions."
}], "Text",
 CellChangeTimes->{{3.445224988580904*^9, 3.445225082579658*^9}, {
   3.4452283658901043`*^9, 3.4452284581917267`*^9}, 3.445228517079393*^9}],

Cell[BoxData[
 RowBox[{"<<", "Bioinformatica`"}]], "Input",
 CellChangeTimes->{{3.445134528978896*^9, 3.445134556239543*^9}, {
  3.445136013849811*^9, 3.445136015415406*^9}, {3.445167942995329*^9, 
  3.4451679471910343`*^9}}]
}, Open  ]]
}, Open  ]],

Cell[CellGroupData[{

Cell["Statistical Models", "Section",
 CellChangeTimes->{{3.444453071572562*^9, 3.4444530750282803`*^9}, {
   3.44445341277978*^9, 3.444453412807624*^9}, {3.444454425691415*^9, 
   3.444454430803143*^9}, 3.444454466175186*^9, {3.4452258312378397`*^9, 
   3.4452258335887127`*^9}}],

Cell["Experimental Design", "Subsection",
 CellChangeTimes->{{3.445225851803762*^9, 3.445225855845324*^9}}],

Cell["Analysis of Variance (ANOVA)", "Subsection",
 CellChangeTimes->{{3.444454438417139*^9, 3.444454442288773*^9}, {
  3.445225844518867*^9, 3.445225846213574*^9}}],

Cell["Fitting Linear Models", "Subsection",
 CellChangeTimes->{{3.444454433657465*^9, 3.4444544378827877`*^9}}]
}, Open  ]],

Cell[CellGroupData[{

Cell["Next Steps and Further Reading", "Section",
 CellChangeTimes->{{3.444453413580114*^9, 3.44445342674747*^9}, {
  3.444454006056712*^9, 3.444454008584454*^9}, {3.445393337107728*^9, 
  3.4453933395713587`*^9}}],

Cell[CellGroupData[{

Cell["Next Steps", "Subsection",
 CellChangeTimes->{{3.445393344499836*^9, 3.44539335509347*^9}}],

Cell["statistical tests", "Text",
 CellChangeTimes->{{3.445393362381115*^9, 3.445393363155573*^9}, {
  3.445393414050374*^9, 3.4453934184396887`*^9}}],

Cell["variations of ANOVA", "Text",
 CellChangeTimes->{{3.445393423550606*^9, 3.445393429531309*^9}}],

Cell["more on regression analysis", "Text",
 CellChangeTimes->{{3.4453934393444147`*^9, 3.445393443870955*^9}}]
}, Open  ]],

Cell[CellGroupData[{

Cell["Further Reading", "Subsection",
 CellChangeTimes->{{3.445393369860952*^9, 3.445393372765326*^9}}],

Cell["blah", "Text",
 CellChangeTimes->{{3.445393379675606*^9, 3.445393380398409*^9}}]
}, Open  ]],

Cell["References", "Subsection",
 CellChangeTimes->{{3.4449222013566933`*^9, 3.444922202799799*^9}}],

Cell[CellGroupData[{

Cell["Links", "Subsection",
 CellChangeTimes->{{3.444922226882545*^9, 3.4449222275050173`*^9}}],

Cell[TextData[{
 StyleBox["The author can be contacted via email: ",
  FontSlant->"Italic"],
 ButtonBox["hwill@acm.org",
  BaseStyle->"Hyperlink",
  ButtonData->{
    URL["mailto:hwill@acm.org"], None},
  ButtonNote->"mailto:hwill@acm.org"],
 StyleBox[".",
  FontSlant->"Italic"]
}], "Text",
 CellChangeTimes->{{3.444954375605266*^9, 3.44495441355757*^9}, {
  3.444954451426589*^9, 3.4449544980252037`*^9}}]
}, Open  ]]
}, Open  ]]
}, Open  ]]
},
WindowSize->{961, 827},
WindowMargins->{{-4, Automatic}, {Automatic, 0}},
PrintingCopies->1,
PrintingPageRange->{1, Automatic},
PrintingOptions->{"PaperOrientation"->"Portrait"},
ShowSelection->True,
CellLabelAutoDelete->True,
FrontEndVersion->"7.0 for Mac OS X x86 (32-bit) (January 30, 2009)",
StyleDefinitions->FrontEnd`FileName[{"Creative"}, "NaturalColor.nb", 
  CharacterEncoding -> "UTF-8"]
]
(* End of Notebook Content *)

(* Internal cache information *)
(*CellTagsOutline
CellTagsIndex->{}
*)
(*CellTagsIndex
CellTagsIndex->{}
*)
(*NotebookFileOutline
Notebook[{
Cell[CellGroupData[{
Cell[567, 22, 120, 1, 66, "Title"],
Cell[690, 25, 254, 3, 31, "Subtitle"],
Cell[947, 30, 48, 0, 25, "Subsubtitle"],
Cell[998, 32, 991, 19, 41, "Text"],
Cell[1992, 53, 2084, 39, 45, "Text"],
Cell[CellGroupData[{
Cell[4101, 96, 198, 3, 72, "Section"],
Cell[CellGroupData[{
Cell[4324, 103, 117, 1, 34, "Subsection"],
Cell[4444, 106, 681, 11, 71, "Text"],
Cell[5128, 119, 556, 9, 56, "Text"],
Cell[5687, 130, 844, 13, 86, "Text"],
Cell[6534, 145, 612, 9, 71, "Text"]
}, Open  ]],
Cell[CellGroupData[{
Cell[7183, 159, 110, 1, 34, "Subsection"],
Cell[7296, 162, 1581, 25, 116, "Text"]
}, Open  ]],
Cell[CellGroupData[{
Cell[8914, 192, 124, 1, 34, "Subsection"],
Cell[9041, 195, 349, 7, 26, "Text"],
Cell[9393, 204, 225, 4, 37, "Input"]
}, Open  ]]
}, Open  ]],
Cell[CellGroupData[{
Cell[9667, 214, 280, 4, 72, "Section"],
Cell[9950, 220, 107, 1, 34, "Subsection"],
Cell[10060, 223, 165, 2, 26, "Subsection"],
Cell[10228, 227, 111, 1, 26, "Subsection"]
}, Open  ]],
Cell[CellGroupData[{
Cell[10376, 233, 214, 3, 72, "Section"],
Cell[CellGroupData[{
Cell[10615, 240, 97, 1, 34, "Subsection"],
Cell[10715, 243, 150, 2, 26, "Text"],
Cell[10868, 247, 101, 1, 26, "Text"],
Cell[10972, 250, 111, 1, 26, "Text"]
}, Open  ]],
Cell[CellGroupData[{
Cell[11120, 256, 103, 1, 34, "Subsection"],
Cell[11226, 259, 86, 1, 26, "Text"]
}, Open  ]],
Cell[11327, 263, 100, 1, 34, "Subsection"],
Cell[CellGroupData[{
Cell[11452, 268, 95, 1, 26, "Subsection"],
Cell[11550, 271, 407, 12, 26, "Text"]
}, Open  ]]
}, Open  ]]
}, Open  ]]
}
]
*)

(* End of internal cache information *)
